"N. of tweets about Xinjiang"),
omit.stat = "f",
se        = list(robust_se1,robust_se2),
add.lines = list(c("Month fixed-effects", "Yes","Yes")),
column.sep.width = "-15pt",  font.size = "normalsize",digits=2)
#Note: In the interest of space, the main manuscript does not include all the month binary variables (i.e., month fixed effects).
#Open Appendix Table 1 in browser
BROWSE("appendix_table1.html")
##################
#Odds ratio plots (Figure 4 and Figure 5)
##################
# Create labels for the plots
boxLabels = c("Civil war",
"Muslim-majority",
"Electoral democracy",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang")
##################
#Figure 4
##################
paste(f1$coefficients[2:11],sep="",collapse = ",") #Copy-paste coefficients below
paste(robust_se1[2:11],sep="",collapse = ",") #Copy-paste robust standard errors below
coeffici <- c(1.11824473751921,-0.0495804582974618,-0.412533122228517,0.0475980970222306,-2.08792765448439,0.0329581896697359,-0.149283677335546,0.0774003372589112,-0.140597379308877,0.00427922545658371)
std = c(0.395304880509999,0.24794868635087,0.270727966098656,0.0933349470525806,0.599151253367272,0.10369202193025,0.0561438639732976,0.134327418670017,0.0796509680181931,0.000967280572074619)
lci <- exp(coeffici - 1.96 * std) #calculate lower confidence interval
or <- exp(coeffici) #calculate odds ratio
uci <- exp(coeffici + 1.96 * std) #calculate upper confidence interval
lreg.or <- cbind(lci, or, uci)
lreg.or
paste(lreg.or[,1],sep="",collapse = ",") #lci
paste(lreg.or[,2],sep="",collapse = ",") #or
paste(lreg.or[,3],sep="",collapse = ",") #uci
#Creating data frame for the new plot
df <- data.frame(
yAxis = length(boxLabels):1,
boxCILow  = lreg.or[,1],
boxOdds = lreg.or[,2],
boxCIHigh = lreg.or[,3]
)
# Creating Figure 4
p <- ggplot(df, aes(x = boxOdds, y = yAxis))
dev.new()
p + geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height = .2, color = "gray50") +
geom_point(size = 3.5, color = "blue") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
theme(text = element_text(size = 15)) +
scale_y_continuous(breaks = df$yAxis, labels = boxLabels) +
scale_x_continuous(breaks = seq(0,7,1) ) +
coord_trans(x = "log10") +
ylab("") +
xlab("Odds ratio (log scale)") +
geom_text(label = "", y =1.1, x = 2.5, color="darkgray")+
ggtitle("")
#Save as "Figure 4.pdf"
##################
#Figure 5
##################
paste(f2$coefficients[2:11],sep="",collapse = ",") #Copy-paste coefficients below
paste(robust_se2[2:11],sep="",collapse = ",") #Copy-paste robust standard errors below
coeffici <- c(1.96225370886596,1.04351783040401,0.763628402946896,-0.0737706477988885,-2.36895570515148,0.0326227182096438,-0.111709669410753,0.665774335292321,-0.472524177947535,0.00455768187885459)
std = c(0.273584453640462,0.195079145544509,0.224646130937335,0.0735866963753981,0.461342456076302,0.0781038547449544,0.0448755710812616,0.12188076679084,0.0679438983052492,0.000630520676231299)
lci <- exp(coeffici - 1.96 * std) #calculate lower confidence interval
or <- exp(coeffici) #calculate odds ratio
uci <- exp(coeffici + 1.96 * std) #calculate upper confidence interval
lreg.or <- cbind(lci, or, uci)
lreg.or
paste(lreg.or[,1],sep="",collapse = ",") #lci
paste(lreg.or[,2],sep="",collapse = ",") #or
paste(lreg.or[,3],sep="",collapse = ",") #uci
#Creating data frame for the new plot
df <- data.frame(
yAxis = length(boxLabels):1,
boxCILow  = lreg.or[,1],
boxOdds = lreg.or[,2],
boxCIHigh = lreg.or[,3]
)
# Creating Figure 5
p <- ggplot(df, aes(x = boxOdds, y = yAxis))
dev.new()
p + geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height = .2, color = "gray50") +
geom_point(size = 3.5, color = "blue") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
theme(text = element_text(size = 15)) +
scale_y_continuous(breaks = df$yAxis, labels = boxLabels) +
scale_x_continuous(breaks = seq(0,7,1) ) +
coord_trans(x = "log10") +
ylab("") +
xlab("Odds ratio (log scale)") +
geom_text(label = "", y =1.1, x = 2.5, color="darkgray")+
ggtitle("")
#Save as "Figure 5.pdf"
##################
# This file replicates the descriptive statistics in Appendix Table 2.
# 08/18/2024
##################
#Run "Replication - Main results.R"
##################
#Appendix Table 2
##################
#Selecting variables in main results
vars4 <- with(mydata, c("threat","bene_rule",
"ucdp_civil_war", "ucdp_bd_best",
"muslim", "muslim_pct",
"elec_demo","ln_gdppc","agree","ln_trade_share_i",
"ln_followers_count","ln_statuses_count",
"ln_friends_count","xinjiang_num"))
summary(mydata[vars4])
var_names = c("Threat",
"Benevolent rule",
"Civil war",
"Battle-related deaths",
"Muslim-majority (binary)",
"Muslim (pop. %)",
"Electoral democracy",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang")
#Produce descriptive stats table
stargazer(as.data.frame(mydata[vars4]),type="text", out = "appendix_table2.html",
digits = 2,
title = "Summary statistics", covariate.labels = var_names )
BROWSE("appendix_table2.html")
library(rworldmap)
library(tidyverse)
library(countrycode)
#Run "Replication - Main results.R"
###########################
#Figure 3: Chinese diplomatic missions using Twitter by country (Main manuscript)
###########################
#Create new dataset with only country and twitter handdle (i.e., screen name)
x = as.data.frame(cbind(mydata$Country,mydata$screen_name))
names(x)=c("country",'screen_name')
x2 = x %>% distinct()
x3 = x2 %>% group_by(country) %>% mutate(count = n())
x4 = x3 %>% distinct(country, count)
x4$ccode <- countrycode(x4$country, origin = "country.name", destination = "iso3c")
#ISO3 is the character code, not numeric code
mapped_data <- joinCountryData2Map(x4, joinCode = "ISO3",
nameJoinColumn="ccode")
#Include all Chinese diplomatic missions using Twitter by country
dev.new()
par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")
mapCountryData(mapped_data, nameColumnToPlot = "count",
mapTitle="",
catMethod=c(1:4),
colourPalette = "white2Black")
#Save as "Figure 3.pdf"
###########################
##Appendix Figures 3 and 4
#Two maps to visualize the spatial distribution of two dependent variables, respectively.
###########################
#Create dataset for Figure 3
x = mydata %>% dplyr::select(Country, threat, bene_rule)
x$cat_bene_rule = ifelse(x$bene_rule==1, "Benevolent Rule", NA)
x$cat_threat = ifelse(x$threat==1, "Threat", NA)
x = x %>% distinct(Country, threat, bene_rule, .keep_all = TRUE) #remove duplicates
x$ccode <- countrycode(x$Country, origin = "country.name", destination = "iso3c")
#Only include overseas missions using the threat strategy
x <- subset(x, is.na(cat_threat)==F)
#ISO3 is the character code, not numeric code
mapped_data <- joinCountryData2Map(x,
joinCode = "ISO3",
nameJoinColumn="ccode")
#Figure 3: Chinese diplomatic missions using the threat strategy by country
dev.new()
par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")
mapCountryData(mapped_data,
nameColumnToPlot = "cat_threat",
mapTitle="",
colourPalette = "white2Black",
catMethod="categorical",
mapRegion=("World"),
borderCol="black")
#Save as "Appendix Figure 3.pdf"
#Create dataset for Figure 4
x = mydata %>% dplyr::select(Country, threat, bene_rule)
x$cat_bene_rule = ifelse(x$bene_rule==1, "Benevolent Rule", NA)
x$cat_threat = ifelse(x$threat==1, "Threat", NA)
x = x %>% distinct(Country, threat, bene_rule, .keep_all = TRUE) #remove duplicates
x$ccode <- countrycode(x$Country, origin = "country.name", destination = "iso3c")
#Only include overseas missions using the benevolent rule strategy
x <- subset(x, is.na(cat_bene_rule)==F)
#ISO3 is the character code, not numeric code
mapped_data <- joinCountryData2Map(x,
joinCode = "ISO3",
nameJoinColumn="ccode")
#Figure 4: Chinese diplomatic missions using the benevolent rule strategy by country
dev.new()
par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")
mapCountryData(mapped_data,
nameColumnToPlot = "cat_bene_rule",
mapTitle="",
colourPalette = "white2Black",
catMethod="categorical",
mapRegion=("World"),
borderCol="black")
#Save as "Appendix Figure 4.pdf"
library(xtable)
library(readxl)
#upload data
Table_2 <- read_excel("Table 2.xlsx") #upload data
#Table 2: Top 20 terms associated with each dependent variable ordered by Fightin’ Words statistic (Monroe, et al. 2008)
xtable(Table_2) # print latex
library(MASS)
library(xtable)
#remotes::install_github("vdeminstitute/vdemdata")
library(vdemdata)
library(systemfit)
#Run "Replication - Main results.R"
##################
#Appendix Table 3
##################
#Make a a continuous variable measuring the percentage of Muslims
#in the host country’s population
mydata$muslim_pct = mydata$muslim_pct_2020*100
head(mydata$muslim_pct)
#Appendix Table 3 Model 1 is a robustness check that uses a
#continuous battle-related deaths variable.
#ucdp_bd_best robustness check Model 1
f1 <- glm(threat ~ (ucdp_bd_best) +
elec_demo + ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = mydata, family = "binomial")
#Appendix Table 3 Model 2 is a robustness check using a continuous variable
#measuring the percentage of
#Muslims in the host country’s population.
#muslim pct robustness check Model 2
f2 <- glm(bene_rule ~ muslim_pct +
elec_demo + ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = mydata, family = "binomial")
#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))
#Creating Appendix Table 3: Robustness check with continuous variables for battle-related deaths and Muslim countries
stargazer(f1,f2,type='text', out="appexid_table3.html",
title="",
dep.var.caption = "Repression image management strategies",
dep.var.labels = c("Threat","Benevolent rule"),
covariate.labels = c("Battle-related deaths",
"Muslim (pop. %)",
"Electoral democracy",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang"),
omit.stat = "f",
se        = list(robust_se1,robust_se2),
add.lines = list(c("Month fixed-effects", "Yes","Yes")),
column.sep.width = "-15pt",  font.size = "normalsize",digits=2)
#Note: In the interest of space, the appendix does not include all the month binary variables (i.e., month fixed effects).
#Open Appendix Table 3 in browser
BROWSE("appexid_table3.html")
##################
#Appendix Table 4
##################
#This is a robustness check aggregating the dependent variables by month and using count models.
#As suggested by reviewer, I use negative binomial regressions to re-test the hypotheses
#select variables I want
x = mydata %>% dplyr::select("screen_name", "year","month",
"threat","bene_rule",
"ucdp_civil_war",
"muslim", "muslim_pct",
"elec_demo","ln_gdppc","agree","ln_trade_share_i",
"ln_followers_count","ln_statuses_count",
"ln_friends_count","xinjiang_num")
#collapse data by month
x <- x %>% group_by(screen_name,month) %>% mutate_if(is.numeric, mean, na.rm = TRUE)
x <- x %>% distinct(month, screen_name, .keep_all = TRUE) #remove duplicates
x2 <- mydata %>% group_by(screen_name,month) %>% mutate_if(is.numeric, sum, na.rm = TRUE)
x3 <- x2 %>% distinct(month, screen_name, .keep_all = TRUE) #remove duplicates
x$threat = x3$threat
x$bene_rule = x3$bene_rule
x$xinjiang_num = x3$xinjiang_num
View(x)
#dependent variable is no longer a binary variable
summary(x$threat)
var(x$threat)
summary(x$bene_rule)
var(x$bene_rule)
#Conducting negative binomial model 1
f1 <-  glm.nb(threat ~ ucdp_civil_war + muslim + elec_demo + ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = x)
#Conducting negative binomial model 1
f2 <-  glm.nb(bene_rule ~ ucdp_civil_war + muslim + elec_demo + ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = x)
#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))
#Creating Appendix Table 4: Negative binomial regression models estimating the effects of recipient characteristics on repression image management strategies (Monthly data)
stargazer(f1,f2,type='text',  out="appexid_table4.html",
title="",
dep.var.caption = "Repression image management strategies",
covariate.labels = c("Civil war",
"Muslim-majority",
"Electoral democracy",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang"),
dep.var.labels = c("Threat","Benevolent rule"),
omit.stat = "f",
se        = list(robust_se1,robust_se2),
add.lines = list(c("Month fixed-effects", "Yes","Yes")),
column.sep.width = "-15pt",  font.size = "normalsize",digits=2)
#Note: In the interest of space, the appendix does not include all the month binary variables (i.e., month fixed effects).
#Open Appendix Table 4 in browser
BROWSE("appexid_table4.html")
summary(x$threat)
var(x$threat)
summary(x$bene_rule)
var(x$bene_rule)
colnames(mydata)
head(mydata$Country)
head(mydata$screen_name)
head(mydata$short_name)
x = mydata %>% dplyr::select(short_name, screen_name, Country)
x = x %>% distinct(Country, screen_name, .keep_all = TRUE) #remove duplicates
#embassy_chinese is Tonga
#Creating Appendix Table 5: Chinese diplomatic missions using Twitter by country in the dataset
xtable(x)
#As suggested by reviewer, this is a robustness check using V-Dem's Electoral democracy index rather than Freedom House's Electoral Index.
#v2x_polyarchy == Electoral democracy index (Continuous)
head(vdem$v2x_polyarchy)
table(vdem$v2x_polyarchy)
x = vdem %>% dplyr::select(country_name, year, v2x_polyarchy) #keep relevant columns
x$COWcode <- countrycode(x$country_name, origin = "country.name", destination = "cown")
#Have to manually give code to Serbia because of Yugoslavia years
x$COWcode <- ifelse(x$country_name == "Serbia", "345", x$COWcode)
x = subset(x, year > 2013) #subset to my time frame
mydata$COWcode <- countrycode(mydata$Country, origin = "country.name", destination = "cown")
mydata$COWcode <- ifelse(mydata$Country == "Serbia", "345", mydata$COWcode)
#Merge the two datasets (namely my data and V-Dem data)
x = merge.data.frame(mydata, x, by = c("COWcode","year"), all.x = T)
#model 1
f1 <- glm(threat ~ ucdp_civil_war + muslim + v2x_polyarchy +
ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = x, family = "binomial")
#model 2
f2 <- glm(bene_rule ~ ucdp_civil_war + muslim  + v2x_polyarchy +
ln_gdppc + agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = x, family = "binomial")
#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))
#Creating #Appendix Table 6: Logistic regression models estimating the effects of recipient characteristics on repression image management strategies (with V-Dem’s Electoral Democracy Index)
stargazer(f1,f2,type='text', out="appexid_table6.html",
title="",
dep.var.caption = "Repression image management strategies",
dep.var.labels = c("Threat","Benevolent rule"),
covariate.labels = c("Civil war",
"Muslim-majority",
"Electoral regime (V-Dem)",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang"),
omit.stat = "f",
se        = list(robust_se1,robust_se2),
add.lines = list(c("Month fixed-effects", "Yes","Yes")),
column.sep.width = "-15pt",  font.size = "normalsize",digits=2)
#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).
#Open Appendix Table 6 in browser
BROWSE("appexid_table6.html")
#As suggested by reviewer, I use seemingly unrelated regression models to re-test the hypotheses
#Defining the equations
r1= threat ~ ucdp_civil_war + muslim + elec_demo +
ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month)
r2 = bene_rule ~ ucdp_civil_war + muslim  + elec_demo +
ln_gdppc + agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month)
# A summary of the systemfit first shows a summary of the system, then the separate equations, and then how the residuals of the two equations are related.
#These are followed by the OLS fits of the separate equations
fitsur <- systemfit(list(readreg = r1, mathreg = r2), data=mydata)
#Creating Appendix Table 7: Seemingly unrelated regression models estimating the effects of recipient characteristics on repression image management strategies
summary(fitsur)
#As suggested by reveiwrers, I run some regression models including the strategy that is not applied as an independent variable in the analyses
#Conducting logistic regression model 1
f1 <- glm(threat ~ bene_rule + ucdp_civil_war + muslim + elec_demo +
ln_gdppc +
agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = mydata, family = "binomial")
#Conducting logistic regression model 2
f2 <- glm(bene_rule ~ threat + ucdp_civil_war + muslim  + elec_demo +
ln_gdppc + agree + ln_trade_share_i +
ln_followers_count + ln_statuses_count +
ln_friends_count + xinjiang_num + factor(month),
data = mydata, family = "binomial")
#Calculating robust standard errors
cov1        <- sandwich::vcovHC(f1, type = "HC1")
robust_se1    <- sqrt(abs(diag(cov1)))
cov2       <- sandwich::vcovHC(f2, type = "HC1")
robust_se2    <- sqrt(abs(diag(cov2)))
#Creating Appendix Table 8: Logistic regression models estimating the effects of recipient characteristics on repression image management strategies (Controlling for diverse strategies)
stargazer(f1,f2,type='text', out="appexid_table8.html",
title="",
dep.var.caption = "Repression image management strategies",
dep.var.labels = c("Threat","Benevolent rule"),
covariate.labels = c("Benevolent rule strategy",
"Threat strategy",
"Civil war",
"Muslim-majority",
"Electoral democracy",
"Log(GDP p.c.)",
"UN voting with China",
"Log(Trade share with China)",
"Log(N. of followers)",
"Log(N. of tweets)",
"Log(N. of friends)",
"N. of tweets about Xinjiang"),
omit.stat = "f",
se        = list(robust_se1,robust_se2),
add.lines = list(c("Month fixed-effects", "Yes","Yes")),
column.sep.width = "-15pt",  font.size = "normalsize",digits=2)
#Note: Since they are too long, the main manuscript does not include all the month binary variables (i.e., month fixed effects).
#Open Appendix Table 8 in browser
BROWSE("appendix_table8.html")
#Open Appendix Table 8 in browser
BROWSE("appendix_table8.html")
#There are a total of 2,628 tweets (3%) talking about Chinese actions in Xinjiang in the 82,110 tweets diplomatic corpus.
sum(mydata$xinjiang) # 2628 tweets
sum(mydata$xinjiang)/nrow(mydata) # 3% of total tweets
#Among these, Beijing used the threat strategy in 290 tweets (11%) and the benevolent rule strategy in 545 tweets (21%).
sum(mydata$threat)# 290 tweets
sum(mydata$threat)/sum(mydata$xinjiang) # 11% of total xinjiang tweets
sum(mydata$bene_rule)/sum(mydata$xinjiang) # 545 tweets
sum(mydata$bene_rule)/sum(mydata$xinjiang) # 21% of total xinjiang tweets
#A Pearson correlation test between the threat strategy and the benevolent rule strategy yields a correlation coefficient of 0.11.
cor.test(mydata$threat, mydata$bene_rule, use = "pairwise.complete.obs") #correlation coefficient of 0.11
#Page 14
#Only 5 of the 88 Twitter accounts in the sample surpassed the 3,200 tweets threshold.
x = mydata %>% dplyr::select(screen_name, statuses_count)
library(ggplot2)
library(stargazer)
library(httr)
library(tidyverse)
library(rworldmap)
library(dplyr)
library(sandwich)
options(scipen = 999)
#There are a total of 2,628 tweets (3%) talking about Chinese actions in Xinjiang in the 82,110 tweets diplomatic corpus.
sum(mydata$xinjiang) # 2628 tweets
sum(mydata$xinjiang)/nrow(mydata) # 3% of total tweets
#Among these, Beijing used the threat strategy in 290 tweets (11%) and the benevolent rule strategy in 545 tweets (21%).
sum(mydata$threat)# 290 tweets
sum(mydata$threat)/sum(mydata$xinjiang) # 11% of total xinjiang tweets
sum(mydata$bene_rule)/sum(mydata$xinjiang) # 545 tweets
sum(mydata$bene_rule)/sum(mydata$xinjiang) # 21% of total xinjiang tweets
#A Pearson correlation test between the threat strategy and the benevolent rule strategy yields a correlation coefficient of 0.11.
cor.test(mydata$threat, mydata$bene_rule, use = "pairwise.complete.obs") #correlation coefficient of 0.11
#Page 14
#Only 5 of the 88 Twitter accounts in the sample surpassed the 3,200 tweets threshold.
x = mydata %>% dplyr::select(screen_name, statuses_count)
x = x %>% distinct(screen_name, statuses_count, .keep_all = TRUE) #remove duplicates
table(x$statuses_count>3200)
#ChinaEmbTurkey, CathayPak, AmbassadeChine, Chinaembmanila, and ChinaEmbOttawa
#Page 13
#Based on this study’s data, 96% of the tweets published by the Chinese Embassy in Brazil (@EmbaixadaChina) are in Portuguese.
table(mydata$lang) #language column
x = subset(mydata, screen_name == "EmbaixadaChina")
#Chinese Embassy in Brazil (@EmbaixadaChina)
prop.table(table(x$lang))
#pt is Portuguese
